scikitlearn lesson_predict_stock

lesson_predict_stock
In [6]:
# pip install ipython==6.1.0 jupyter==1.0.0 matplotlib==2.0.2 
# pip install numpy==1.13.3 pandas==0.20.3 pandas-datareader==0.5.0 scikit-learn==0.19.0
import datetime 

import IPython.display
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas_datareader
import sklearn
import sklearn.linear_model
import sklearn.model_selection


IPython.display.Image('concept.png', width=500)
Out[6]:
In [7]:
# データウェアハウス
df_aapl = pandas_datareader.data.DataReader('AAPL',  'yahoo', '2014-11-01')
df_fb = pandas_datareader.data.DataReader('FB',  'yahoo', '2014-11-01')
df_gold = pandas_datareader.data.DataReader('GLD',  'yahoo', '2014-11-01')
In [8]:
df_aapl.tail(2)
Out[8]:
Open High Low Close Adj Close Volume
Date
2017-12-26 170.800003 171.470001 169.679993 170.570007 170.570007 33185500
2017-12-27 170.100006 170.779999 169.710007 170.600006 170.600006 21183000
In [9]:
IPython.display.Image('concept.png', width=500)
Out[9]:
In [10]:
# 統計学 Simple Moving Average
df_aapl['SMA'] = df_aapl['Close'].rolling(window=14).mean()
df_aapl['Close'].plot(figsize=(15,6), color="red")
df_aapl['SMA'].plot(figsize=(15,6), color="green")
plt.show()
In [11]:
IPython.display.Image('concept.png', width=500)
Out[11]:
In [12]:
# データーマイニング
df_aapl['change'] = (((df_aapl['Close'] - df_aapl['Open'])) / (df_aapl['Open']) * 100)
df_fb['change'] = (((df_fb['Close'] - df_fb['Open'])) / (df_fb['Open']) * 100)
df_gold['change'] = (((df_gold['Close'] - df_gold['Open'])) / (df_gold['Open']) * 100)
df_aapl.tail(2).round(2)
Out[12]:
Open High Low Close Adj Close Volume SMA change
Date
2017-12-26 170.8 171.47 169.68 170.57 170.57 33185500 172.60 -0.13
2017-12-27 170.1 170.78 169.71 170.60 170.60 21183000 172.72 0.29
In [13]:
# データマイニング
df_aapl['Close'].plot(figsize=(15,6), color="red")
df_fb['Close'].plot(figsize=(15,6), color="blue")
plt.show()
In [14]:
# データマイニング
df_aapl['Close'].plot(figsize=(15,6), color="red")
df_gold['Close'].plot(figsize=(15,6), color="orange")
plt.show()
In [15]:
# データマイニング
df_aapl['change'].tail(100).plot(grid=True, figsize=(15,6), color="red")
df_fb['change'].tail(100).plot(grid=True, figsize=(15,6), color="blue")
df_gold['change'].tail(100).plot(grid=True, figsize=(15,6), color="orange")
plt.show()
In [16]:
IPython.display.Image('concept.png', width=500)
Out[16]:
In [17]:
IPython.display.Image('algo.png')
Out[17]:
In [18]:
# 機械学習(マシンラーニング)
df_aapl['label'] = df_aapl['Close'].shift(-30)
df_aapl.tail(40)
Out[18]:
Open High Low Close Adj Close Volume SMA change label
Date
2017-10-31 167.899994 169.649994 166.940002 169.039993 168.434494 36046800 159.373572 0.678975 172.270004
2017-11-01 169.869995 169.940002 165.610001 166.889999 166.292206 33637800 160.151429 -1.754280 172.220001
2017-11-02 166.600006 168.500000 165.279999 168.110001 167.507828 41393400 160.945715 0.906360 173.970001
2017-11-03 174.000000 174.259995 171.119995 172.500000 171.882111 59398600 161.847143 -0.862069 176.419998
2017-11-06 172.369995 174.990005 171.720001 174.250000 173.625839 35026300 162.831429 1.090680 174.539993
2017-11-07 173.910004 175.250000 173.600006 174.809998 174.183823 24361500 163.906429 0.517506 174.350006
2017-11-08 174.660004 176.240005 174.330002 176.240005 175.608719 24409500 165.353572 0.904615 175.009995
2017-11-09 175.110001 176.100006 173.139999 175.880005 175.250000 29482600 166.755715 0.439726 175.009995
2017-11-10 175.110001 175.380005 174.270004 174.669998 174.669998 25145500 168.077144 -0.251272 170.570007
2017-11-13 173.500000 174.500000 173.399994 173.970001 173.970001 16982100 169.282144 0.270894 170.600006
2017-11-14 173.039993 173.479996 171.179993 171.339996 171.339996 24782500 170.348572 -0.982430 NaN
2017-11-15 169.970001 170.320007 168.380005 169.080002 169.080002 29158100 171.182143 -0.523621 NaN
2017-11-16 171.179993 171.869995 170.300003 171.100006 171.100006 23637500 171.757143 -0.046727 NaN
2017-11-17 171.039993 171.389999 169.639999 170.149994 170.149994 21899500 172.002143 -0.520346 NaN
2017-11-20 170.289993 170.559998 169.559998 169.979996 169.979996 16262400 172.069286 -0.182041 NaN
2017-11-21 170.779999 173.699997 170.779999 173.139999 173.139999 25131300 172.515714 1.381895 NaN
2017-11-22 173.360001 175.000000 173.050003 174.960007 174.960007 25588900 173.005001 0.922938 NaN
2017-11-24 175.100006 175.500000 174.649994 174.970001 174.970001 14026700 173.181429 -0.074246 NaN
2017-11-27 175.050003 175.080002 173.339996 174.089996 174.089996 20716800 173.170000 -0.548419 NaN
2017-11-28 174.300003 174.869995 171.860001 173.070007 173.070007 26428800 173.045715 -0.705678 NaN
2017-11-29 172.630005 172.919998 167.160004 169.479996 169.479996 41666400 172.562857 -1.824717 NaN
2017-11-30 170.429993 172.139999 168.440002 171.850006 171.850006 41527200 172.275000 0.833194 NaN
2017-12-01 169.949997 171.669998 168.500000 171.050003 171.050003 39759300 172.016429 0.647253 NaN
2017-12-04 172.479996 172.619995 169.630005 169.800003 169.800003 32542400 171.718572 -1.553799 NaN
2017-12-05 169.059998 171.520004 168.399994 169.639999 169.639999 27350200 171.597144 0.343074 NaN
2017-12-06 167.500000 170.199997 166.460007 169.009995 169.009995 28560000 171.592143 0.901490 NaN
2017-12-07 169.029999 170.440002 168.910004 169.320007 169.320007 25673300 171.465001 0.171572 NaN
2017-12-08 170.490005 171.000000 168.820007 169.369995 169.369995 23355200 171.409286 -0.656936 NaN
2017-12-11 169.199997 172.889999 168.789993 172.669998 172.669998 35273800 171.601429 2.050828 NaN
2017-12-12 172.149994 172.389999 171.460007 171.699997 171.699997 19409200 171.498572 -0.261398 NaN
2017-12-13 172.500000 173.539993 172.000000 172.270004 172.270004 23818400 171.306429 -0.133331 NaN
2017-12-14 172.399994 173.130005 171.649994 172.220001 172.220001 20476500 171.110000 -0.104404 NaN
2017-12-15 173.630005 174.169998 172.460007 173.970001 173.970001 40169300 171.101429 0.195816 NaN
2017-12-18 174.880005 177.199997 174.860001 176.419998 176.419998 29421100 171.340714 0.880600 NaN
2017-12-19 175.029999 175.389999 174.089996 174.539993 174.539993 27436400 171.702143 -0.279955 NaN
2017-12-20 174.869995 175.419998 173.250000 174.350006 174.350006 23475600 171.880714 -0.297357 NaN
2017-12-21 174.169998 176.020004 174.100006 175.009995 175.009995 20949900 172.163571 0.482286 NaN
2017-12-22 174.679993 175.419998 174.500000 175.009995 175.009995 16349400 172.535713 0.188918 NaN
2017-12-26 170.800003 171.470001 169.679993 170.570007 170.570007 33185500 172.602142 -0.134658 NaN
2017-12-27 170.100006 170.779999 169.710007 170.600006 170.600006 21183000 172.715714 0.293945 NaN
In [19]:
# 機械学習(マシンラーニング)

# ラベル行を削除したデーターをXに代入
X = np.array(df_aapl.drop(['label', 'SMA'], axis=1))
# 取りうる値の大小が著しく異なる特徴量を入れると結果が悪くなり、平均を引いて、標準偏差で割ってスケーリングする
X = sklearn.preprocessing.scale(X)

# 予測に使う過去30日間のデーター
predict_data = X[-30:]
# 過去30日を取り除いた入力データー
X = X[:-30]
y = np.array(df_aapl['label'])
# 過去30日を取り除いた正解ラベル
y = y[:-30]

# 訓練データー80% 検証データー 20%に分ける
# 第一引数に入力データー、第2引数に正解ラベルの配列
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, test_size = 0.2)

# 訓練データーを用いて学習する
lr = sklearn.linear_model.LinearRegression()
lr.fit(X_train,y_train)

# 検証データーを用いて検証してみる
accuracy = lr.score(X_test, y_test)
accuracy
Out[19]:
0.83064400958651796
In [20]:
# 予測する
predicted_data = lr.predict(predict_data)
predicted_data
Out[20]:
array([ 175.58196487,  173.63475578,  175.50582341,  174.44547705,
        174.16146671,  178.32066966,  179.49107816,  178.70335525,
        177.85169011,  177.23935252,  174.1041429 ,  176.98742683,
        176.22486869,  174.19615171,  174.9557126 ,  174.49545227,
        174.22837868,  173.82354325,  178.40305351,  175.77661636,
        176.70947587,  176.46826015,  178.43126176,  181.17918361,
        178.56940734,  178.37029169,  179.49592288,  178.94580131,
        175.21993924,  175.01249768])
In [21]:
df_aapl['Predict'] = np.nan

last_date = df_aapl.iloc[-1].name

one_day = 86400
next_unix = last_date.timestamp() + one_day

for data in predicted_data:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    df_aapl.loc[next_date] = np.append([np.nan]* (len(df_aapl.columns)-1), data)

df_aapl['Close'].plot(figsize=(15,6), color="green")
df_aapl['Predict'].plot(figsize=(15,6), color="orange")
plt.show()
In [22]:
IPython.display.Image('concept.png', width=500)
Out[22]:
In [23]:
# ディープラーニング
# https://www.tensorflow.org/tutorials/deep_cnn
In [24]:
IPython.display.Image('concept.png', width=500)
Out[24]:
In [25]:
# 人工知能(AI)
if df_aapl['Predict'][-1] > df_aapl['Close'][-31]:
    print('Buy using REST API')
else:
    print('Sell using REST API')
Buy using REST API